*******************************************

*			CLEAN AND EXPORT TAX ROLL	  *
*				And Building Data 		  *
*					FOR GIS				  *

*******************************************

clear
set more off
*cd "C:\Users\mrh105\Box\EDWIN_DEP_Well_Data\Washington_County_Project\Final Code\Building Data" /*PC*/
cd "C:\Users\maxha\Box\EDWIN_DEP_Well_Data\Washington_County_Project\Final Code\Building Data" /*Home PC*/ 

/*Tax Roll Out Buildings*/
import excel using "out bldgs.xlsx", sheet("Query Results") firstrow clear
	drop JUR MEAS1 MEAS2
	rename AREA SFLA  /*Note that this is not really Square Footage of Living Area, because outbuildings aren't assessed as having living areas. But this is the only available area measure for out buildings*/
	gen LUC= "Detached Out Building"
	gen type= "out"
	bysort PARID : gen CARD_OB= _n
	drop if SFLA<400 | SFLA==.

save WC_TaxRoll_2017_OutBuildings.dta, replace

/*Tax Roll Commercial Buildings*/
clear
import excel "com data.xlsx", sheet("Query Results") firstrow
	expand NUMIDENT /*Expanding by the number of identical structures variable*/ 
	drop STRUCTURE NUMIDENT
	gen LUC= "Commercial"
	gen type= "commercial"

save WC_TaxRoll_2017_Commercial.dta, replace

/*Tax Roll Main Buildings*/
clear
use WC_TaxRoll_2017.dta
	gen type= "main"
	sort PARID CARD
	duplicates drop PARID CARD CLASS SFLA YRBLT ADRNO STORIES BLDVAL, force
	drop if SFLA<400 | SFLA==.
	
*Append using out buildings and commercial buildings
	append using WC_TaxRoll_2017_OutBuildings.dta
	append using WC_TaxRoll_2017_Commercial.dta

	*Generate Decade of Year Built Variable
	tostring YRBLT, generate(YRBLT_str)
	gen YRBLT_dec= substr(YRBLT_str, 1,3)
	destring YRBLT_dec, replace
	gen YRBLT_decade= YRBLT_dec*10
	drop YRBLT_str YRBLT_dec
	
	*Dropping 1)Parcels Without Buildings and 2) Parcels With Buildings but Without YRBLT field
	replace BLDVAL=1 if YRBLT!=. & BLDVAL==0 /*These observations have build years so they should have some positive build value*/
	drop if BLDVAL==. & YRBLT==. /*Observations have no value or year data, which likely means they do not have buildings*/
	
	*Creating the PIN varabile from PARID To Use For Matching to the GIS Building Data
	gen PIN1= substr(PARID, 1,3)
	gen PIN2= substr(PARID, 4,3)
	gen PIN3= substr(PARID, 7,2)
	gen PIN4= substr(PARID, 9,2)
	gen PIN5= substr(PARID, 11,4)
	gen PIN6= substr(PARID, 15,.)
	gen PIN= PIN1+"-"+PIN2+"-"+PIN3+"-"+PIN4+"-"+PIN5+"-"+PIN6

*Creating the building size rankings for matching duplicates in the Tax Roll Data:
	duplicates tag PIN SFLA, gen(dup)
	replace SFLA=1000000 if dup>=1 & LUC=="Commercial" & SFLA==. /*Assuming that commerical buildings are larger than out buildings on parcels with both commerical and out buildings*/
	drop dup
	gsort PIN -SFLA -YRBLT /*Assuming more recent buildings are larger*/
	by PIN: gen size_rank=_n

save WC_TaxRoll_2017_Clean.dta, replace

*Creating the building size rankings for matching in the GIS Building Data:
clear
use WC_GIS_BuildingOutlines_2017.dta
	gsort PIN -Area_Calc OBJECTID
	by PIN: gen size_rank=_n
	drop if Area_Calc<37.1612 /*To stay consistent with small buildings dropped from out and main building data, dropping GIS outlines less than 400 square feet (37.1612 square meters).*/
	keep OBJECTID PIN size_rank
	bysort PIN: gen numobsGIS=_N
	
save WC_GIS_BuildingOutlines_2017_Merge.dta, replace

*Merging the Tax Roll Data and GIS Building Data:
clear
use WC_GIS_BuildingOutlines_2017_Merge.dta

	merge 1:1 PIN size_rank using WC_TaxRoll_2017_Clean.dta
 																								
*Cleaning the Merged Data	
	drop if YRBLT==.  /*Buildings from the GIS file have not been matched with a observation in the Tax Roll and have been dropped*/
	drop if OBJECTID==. 
						/*Observations in the tax roll file could not be matched with an ObjectID in the GIS data and have been dropped*/

	sort OBJECTID
	gen bldg_id= _n
	
	gen central_ac= (HEAT=="4:CENTRAL A/C")
		
save "C:\Users\maxha\Box\EDWIN_DEP_Well_Data\Washington_County_Project\Final Code\Forgone Value Calculations and Shifting Test\WC_TaxRoll_GIS_ForgoneValue.dta", replace

	keep bldg_id OBJECTID PARID PIN type YRBLT SFLA RMTOT RMBED FIXBATH central_ac 
	order bldg_id OBJECTID PARID PIN type YRBLT SFLA RMTOT RMBED FIXBATH central_ac
	
save WC_TaxRoll_GIS_BackMerge1.dta, replace
	
	keep bldg_id OBJECTID PARID PIN YRBLT

export delimited using "C:\Users\maxha\Box\EDWIN_DEP_Well_Data\Washington_County_Project\Final Code\Building Data\WashingtonCountyGISTaxRolldata.csv", replace   /*Export for Arc GIS Building Data*/

	*Cleaning Up*
	*erase WC_TaxRoll_GIS_ForgoneValue.dta
	*erase WC_TaxRoll_GIS_BackMerge1.dta
	erase WashingtonCountyGISTaxRolldata.csv
	erase WC_TaxRoll_2017_OutBuildings.dta
	erase WC_TaxRoll_2017_Commercial.dta
	erase WC_GIS_BuildingOutlines_2017_Merge.dta
	erase WC_TaxRoll_2017_Clean.dta
